library(tidyverse)
## Warning: package 'dplyr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
train <- read.csv("C:/Users/Jared/OneDrive/Desktop/Math 425/house-prices-advanced-regression-techniques-1/train.csv", stringsAsFactors = TRUE)

train <- train %>%
  mutate(Alley = as.character(Alley),
         Alley = replace_na(Alley, "None"),
         Alley = as.factor(Alley)) %>%
  mutate(TotalSF = X1stFlrSF + X2ndFlrSF + TotalBsmtSF,
                RichNbrhd = case_when(Neighborhood %in% c("StoneBr", "NridgHt", "NoRidge") ~ 1,
                             TRUE ~ 0))


plot(SalePrice ~ Neighborhood, data=train, las=2)

lm1 <- lm(SalePrice ~ Neighborhood, data=train)
summary(lm1)
## 
## Call:
## lm(formula = SalePrice ~ Neighborhood, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -162271  -27552   -5324   19685  419705 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           194871      13097  14.879  < 2e-16 ***
## NeighborhoodBlueste   -57371      40367  -1.421 0.155463    
## NeighborhoodBrDale    -90377      18809  -4.805 1.71e-06 ***
## NeighborhoodBrkSide   -70037      14893  -4.703 2.81e-06 ***
## NeighborhoodClearCr    17695      16603   1.066 0.286721    
## NeighborhoodCollgCr     3095      13819   0.224 0.822820    
## NeighborhoodCrawfor    15754      15123   1.042 0.297712    
## NeighborhoodEdwards   -66651      14166  -4.705 2.78e-06 ***
## NeighborhoodGilbert    -2016      14437  -0.140 0.888944    
## NeighborhoodIDOTRR    -94747      15822  -5.988 2.67e-09 ***
## NeighborhoodMeadowV   -96294      18522  -5.199 2.29e-07 ***
## NeighborhoodMitchel   -38601      15200  -2.540 0.011204 *  
## NeighborhoodNAmes     -49024      13582  -3.609 0.000318 ***
## NeighborhoodNoRidge   140424      15577   9.015  < 2e-16 ***
## NeighborhoodNPkVill   -52176      22260  -2.344 0.019217 *  
## NeighborhoodNridgHt   121400      14470   8.390  < 2e-16 ***
## NeighborhoodNWAmes     -5821      14542  -0.400 0.689011    
## NeighborhoodOldTown   -66646      14047  -4.744 2.30e-06 ***
## NeighborhoodSawyer    -58078      14523  -3.999 6.69e-05 ***
## NeighborhoodSawyerW    -8315      14864  -0.559 0.575974    
## NeighborhoodSomerst    30509      14333   2.129 0.033456 *  
## NeighborhoodStoneBr   115628      16975   6.812 1.42e-11 ***
## NeighborhoodSWISU     -52280      16975  -3.080 0.002111 ** 
## NeighborhoodTimber     47377      15756   3.007 0.002686 ** 
## NeighborhoodVeenker    43902      20895   2.101 0.035810 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 54000 on 1435 degrees of freedom
## Multiple R-squared:  0.5456, Adjusted R-squared:  0.538 
## F-statistic: 71.78 on 24 and 1435 DF,  p-value: < 2.2e-16
lm.1stflr <- lm(SalePrice ~ X1stFlrSF, data=train)
summary(lm.1stflr)
## 
## Call:
## lm(formula = SalePrice ~ X1stFlrSF, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -460330  -36494  -13164   36291  414547 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 36173.447   5245.728   6.896 7.95e-12 ***
## X1stFlrSF     124.501      4.282  29.078  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63220 on 1458 degrees of freedom
## Multiple R-squared:  0.3671, Adjusted R-squared:  0.3666 
## F-statistic: 845.5 on 1 and 1458 DF,  p-value: < 2.2e-16
plot(SalePrice ~ X1stFlrSF, data=train)

lm.alley <- lm(SalePrice ~ Alley, data=train)
summary(lm.alley)
## 
## Call:
## lm(formula = SalePrice ~ Alley, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -148552  -50952  -15636   31581  571548 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   122219      11127  10.984  < 2e-16 ***
## AlleyNone      61233      11329   5.405 7.56e-08 ***
## AlleyPave      45782      16577   2.762  0.00582 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 78680 on 1457 degrees of freedom
## Multiple R-squared:  0.02041,    Adjusted R-squared:  0.01906 
## F-statistic: 15.18 on 2 and 1457 DF,  p-value: 2.996e-07
plot(SalePrice ~ Alley, data=train)

table(train$Alley)
## 
## Grvl None Pave 
##   50 1369   41
#View(train[,c("SalePrice","Alley")])

lm.fence <- lm(SalePrice ~ Fence, data=train)
summary(lm.fence)
## 
## Call:
## lm(formula = SalePrice ~ Fence, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -108751  -27927   -9751   10714  596249 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   178928       7921  22.590  < 2e-16 ***
## FenceGdWo     -38548      11458  -3.364 0.000876 ***
## FenceMnPrv    -30176       9291  -3.248 0.001305 ** 
## FenceMnWw     -44641      19981  -2.234 0.026270 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60840 on 277 degrees of freedom
##   (1179 observations deleted due to missingness)
## Multiple R-squared:  0.05086,    Adjusted R-squared:  0.04058 
## F-statistic: 4.948 on 3 and 277 DF,  p-value: 0.002313
apply(train, 2, function(x) sum(is.na(x))) #count missing values
##            Id    MSSubClass      MSZoning   LotFrontage       LotArea 
##             0             0             0           259             0 
##        Street         Alley      LotShape   LandContour     Utilities 
##             0             0             0             0             0 
##     LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
##             0             0             0             0             0 
##      BldgType    HouseStyle   OverallQual   OverallCond     YearBuilt 
##             0             0             0             0             0 
##  YearRemodAdd     RoofStyle      RoofMatl   Exterior1st   Exterior2nd 
##             0             0             0             0             0 
##    MasVnrType    MasVnrArea     ExterQual     ExterCond    Foundation 
##             8             8             0             0             0 
##      BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1    BsmtFinSF1 
##            37            37            38            37             0 
##  BsmtFinType2    BsmtFinSF2     BsmtUnfSF   TotalBsmtSF       Heating 
##            38             0             0             0             0 
##     HeatingQC    CentralAir    Electrical     X1stFlrSF     X2ndFlrSF 
##             0             0             1             0             0 
##  LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
##             0             0             0             0             0 
##      HalfBath  BedroomAbvGr  KitchenAbvGr   KitchenQual  TotRmsAbvGrd 
##             0             0             0             0             0 
##    Functional    Fireplaces   FireplaceQu    GarageType   GarageYrBlt 
##             0             0           690            81            81 
##  GarageFinish    GarageCars    GarageArea    GarageQual    GarageCond 
##            81             0             0            81            81 
##    PavedDrive    WoodDeckSF   OpenPorchSF EnclosedPorch    X3SsnPorch 
##             0             0             0             0             0 
##   ScreenPorch      PoolArea        PoolQC         Fence   MiscFeature 
##             0             0          1453          1179          1406 
##       MiscVal        MoSold        YrSold      SaleType SaleCondition 
##             0             0             0             0             0 
##     SalePrice       TotalSF     RichNbrhd 
##             0             0             0
lm.1stflr <- lm(SalePrice ~ X1stFlrSF, data=train)
summary(lm.1stflr)
## 
## Call:
## lm(formula = SalePrice ~ X1stFlrSF, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -460330  -36494  -13164   36291  414547 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 36173.447   5245.728   6.896 7.95e-12 ***
## X1stFlrSF     124.501      4.282  29.078  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63220 on 1458 degrees of freedom
## Multiple R-squared:  0.3671, Adjusted R-squared:  0.3666 
## F-statistic: 845.5 on 1 and 1458 DF,  p-value: < 2.2e-16
lm.2ndflr <- lm(SalePrice ~ X2ndFlrSF, data=train)
summary(lm.2ndflr)
## 
## Call:
## lm(formula = SalePrice ~ X2ndFlrSF, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -159576  -45756  -17756   27144  485454 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.608e+05  2.518e+03   63.84   <2e-16 ***
## X2ndFlrSF   5.812e+01  4.517e+00   12.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 75310 on 1458 degrees of freedom
## Multiple R-squared:  0.102,  Adjusted R-squared:  0.1014 
## F-statistic: 165.6 on 1 and 1458 DF,  p-value: < 2.2e-16
lm.basement <- lm(SalePrice ~ TotalBsmtSF, data=train)
summary(lm.basement)
## 
## Call:
## lm(formula = SalePrice ~ TotalBsmtSF, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -582310  -39612  -14095   33315  420018 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 63430.629   4286.892   14.80   <2e-16 ***
## TotalBsmtSF   111.110      3.745   29.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62750 on 1458 degrees of freedom
## Multiple R-squared:  0.3765, Adjusted R-squared:  0.3761 
## F-statistic: 880.3 on 1 and 1458 DF,  p-value: < 2.2e-16
#Put them all together into a high dimensional multiple regression model

lm.sqft.all <- lm(SalePrice ~ X1stFlrSF + X2ndFlrSF + TotalBsmtSF, data=train)
summary(lm.sqft.all)
## 
## Call:
## lm(formula = SalePrice ~ X1stFlrSF + X2ndFlrSF + TotalBsmtSF, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -697622  -21631    -366   20427  276895 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -14033.593   4492.048  -3.124  0.00182 ** 
## X1stFlrSF       81.786      5.861  13.955  < 2e-16 ***
## X2ndFlrSF       84.493      3.021  27.973  < 2e-16 ***
## TotalBsmtSF     66.718      5.136  12.990  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 49310 on 1456 degrees of freedom
## Multiple R-squared:  0.6155, Adjusted R-squared:  0.6147 
## F-statistic: 776.8 on 3 and 1456 DF,  p-value: < 2.2e-16
#Or use mutate and create a new "TotalSF" variable that allows for a simple linear regression model that is just as powerful, but far easier to graph and interpret.

train <- train %>%
  mutate(TotalSF = X1stFlrSF + X2ndFlrSF + TotalBsmtSF)

lm.sqft <- lm(SalePrice ~ TotalSF, data=train)
summary(lm.sqft)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -715562  -22190    -669   20711  269879 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -13219.802   4251.189   -3.11  0.00191 ** 
## TotalSF         75.628      1.577   47.95  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 49510 on 1458 degrees of freedom
## Multiple R-squared:  0.6119, Adjusted R-squared:  0.6117 
## F-statistic:  2299 on 1 and 1458 DF,  p-value: < 2.2e-16
plot(SalePrice ~ TotalSF, data=train)

lm.sqft.rich <- lm(SalePrice ~ TotalSF + RichNbrhd + TotalSF:RichNbrhd, data=train)
summary(lm.sqft.rich)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + RichNbrhd + TotalSF:RichNbrhd, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -532130  -21090    -866   20892  213493 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.704e+04  4.078e+03   6.630 4.73e-11 ***
## TotalSF            5.659e+01  1.594e+00  35.504  < 2e-16 ***
## RichNbrhd         -1.207e+05  1.695e+04  -7.122 1.66e-12 ***
## TotalSF:RichNbrhd  5.735e+01  4.696e+00  12.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42500 on 1456 degrees of freedom
## Multiple R-squared:  0.7143, Adjusted R-squared:  0.7137 
## F-statistic:  1214 on 3 and 1456 DF,  p-value: < 2.2e-16
lm.sqft.rich.log <- lm(log(SalePrice) ~ TotalSF + RichNbrhd + TotalSF:RichNbrhd, data=train)
summary(lm.sqft.rich)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + RichNbrhd + TotalSF:RichNbrhd, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -532130  -21090    -866   20892  213493 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.704e+04  4.078e+03   6.630 4.73e-11 ***
## TotalSF            5.659e+01  1.594e+00  35.504  < 2e-16 ***
## RichNbrhd         -1.207e+05  1.695e+04  -7.122 1.66e-12 ***
## TotalSF:RichNbrhd  5.735e+01  4.696e+00  12.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42500 on 1456 degrees of freedom
## Multiple R-squared:  0.7143, Adjusted R-squared:  0.7137 
## F-statistic:  1214 on 3 and 1456 DF,  p-value: < 2.2e-16
exp(coef(lm.sqft.rich.log))
##       (Intercept)           TotalSF         RichNbrhd TotalSF:RichNbrhd 
##      6.837410e+04      1.000337e+00      1.374876e+00      9.999872e-01
#are the multipliers for the increase in average Sale Price.
# So 1.000337 means each square foot makes the predicted value 1.000337 times as large. Or, 1,000 square feet of addition makes the home 40% greater in value.

exp(coef(lm.sqft.rich.log)[2]*1000)
## TotalSF 
## 1.40051
house3d <- lm(SalePrice ~ TotalSF + LotArea + TotalSF:LotArea, data=train)
summary(house3d)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + LotArea + TotalSF:LotArea, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -262819  -22740     638   22608  280521 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.981e+04  5.702e+03  -12.24   <2e-16 ***
## TotalSF          9.266e+01  2.018e+00   45.91   <2e-16 ***
## LotArea          4.799e+00  3.551e-01   13.52   <2e-16 ***
## TotalSF:LotArea -1.274e-03  9.184e-05  -13.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46520 on 1456 degrees of freedom
## Multiple R-squared:  0.6578, Adjusted R-squared:  0.657 
## F-statistic: 932.8 on 3 and 1456 DF,  p-value: < 2.2e-16
## For houses with a lot area of 1300 sf, each additional sf of the house (TotalSF) adds b[2]+b[4]*1300
## or $91 to the predicted value of the home.

## For houses with a lot area of 215425 sf, each additional sf of the house (TotalSF) drops the predicted value by $181.74, b[2]+b[4]*215425

## To embed the 3d-scatterplot inside of your html document is harder.
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
#Setup Axis
axis_x <- seq(min(train$TotalSF), max(train$TotalSF), length.out=100)
axis_y <- seq(min(train$LotArea), max(train$LotArea), length.out=100)

#Sample points
air_surface <- expand.grid(TotalSF = axis_x, LotArea = axis_y, KEEP.OUT.ATTRS=F)
air_surface$Z <- predict.lm(house3d, newdata = air_surface)
air_surface <- acast(air_surface, LotArea ~ TotalSF, value.var = "Z") #y ~ x

#Create scatterplot 
# 3D 3D3D3D3D3D3D3D3D3D :)
plot_ly(train, 
        x = ~TotalSF, 
        y = ~LotArea, 
        z = ~SalePrice,
        type = "scatter3d", 
        mode = "markers") %>%
  add_trace(z = air_surface,
            x = axis_x,
            y = axis_y,
            type = "surface")
## Warning: 'surface' objects don't have these attributes: 'mode'
## Valid attributes include:
## '_deprecated', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'coloraxis', 'colorbar', 'colorscale', 'connectgaps', 'contours', 'customdata', 'customdatasrc', 'hidesurface', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'opacityscale', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'surfacecolor', 'surfacecolorsrc', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
plot(SalePrice ~ TotalSF, data=train, ylim=c(-1000000,1000000))

b <- coef(house3d)
b
##     (Intercept)         TotalSF         LotArea TotalSF:LotArea 
##   -6.981395e+04    9.266018e+01    4.798759e+00   -1.273770e-03
drawit <- function(LotArea, col=col){
  curve(b[1] + b[2]*TotalSF + b[3]*LotArea + b[4]*TotalSF*LotArea, add=TRUE, col=col, xname="TotalSF")
}

for (la in axis_y){
  drawit(la, col="red")
}
drawit(1300, col="red")
drawit(215245, col="blue")

househd <- lm(SalePrice ~ TotalSF + LotArea + GarageArea + Alley + FullBath + ScreenPorch, data=train)
summary(househd)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + LotArea + GarageArea + Alley + 
##     FullBath + ScreenPorch, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -612807  -18993   -1339   17487  288849 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.564e+04  7.237e+03  -7.688 2.73e-14 ***
## TotalSF      5.262e+01  2.045e+00  25.732  < 2e-16 ***
## LotArea      2.399e-01  1.251e-01   1.918 0.055308 .  
## GarageArea   9.187e+01  6.756e+00  13.598  < 2e-16 ***
## AlleyNone    2.431e+04  6.546e+03   3.714 0.000211 ***
## AlleyPave    1.800e+04  9.573e+03   1.880 0.060323 .  
## FullBath     2.004e+04  2.669e+03   7.510 1.03e-13 ***
## ScreenPorch  5.459e+01  2.142e+01   2.549 0.010920 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45170 on 1452 degrees of freedom
## Multiple R-squared:  0.6782, Adjusted R-squared:  0.6767 
## F-statistic: 437.2 on 7 and 1452 DF,  p-value: < 2.2e-16
set.seed(121)

num_rows <- 1000 #1460 total
keep <- sample(1:nrow(train), num_rows)

mytrain <- train[keep, ] #Use this in the lm(..., data=mytrain) it is like "rbdata"

mytest <- train[-keep, ] #Use this in the predict(..., newdata=mytest) it is like "rbdata2"


househd <- lm(SalePrice ~ TotalSF + LotArea + GarageArea + Alley + FullBath + ScreenPorch, data=mytrain)
summary(househd)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + LotArea + GarageArea + Alley + 
##     FullBath + ScreenPorch, data = mytrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -375747  -19694   -1284   20342  260709 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.604e+04  7.950e+03  -8.307 3.20e-16 ***
## TotalSF      6.043e+01  2.438e+00  24.787  < 2e-16 ***
## LotArea      1.975e-01  1.786e-01   1.106  0.26908    
## GarageArea   1.027e+02  8.024e+00  12.799  < 2e-16 ***
## AlleyNone    2.843e+04  7.045e+03   4.036 5.87e-05 ***
## AlleyPave    2.522e+04  1.071e+04   2.354  0.01874 *  
## FullBath     9.066e+03  3.152e+03   2.876  0.00411 ** 
## ScreenPorch  5.255e+01  2.607e+01   2.016  0.04407 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42900 on 992 degrees of freedom
## Multiple R-squared:  0.7228, Adjusted R-squared:  0.7209 
## F-statistic: 369.5 on 7 and 992 DF,  p-value: < 2.2e-16
house3d <- lm(SalePrice ~ TotalSF + LotArea + TotalSF:LotArea, data=mytrain)

yh_hd <- predict(househd, newdata=mytest)
yh_3d <- predict(house3d, newdata=mytest)

ybar <- mean(mytest$SalePrice)

SSTO <- sum( (mytest$SalePrice - ybar)^2 )

SSE_hd <- sum( (mytest$SalePrice - yh_hd)^2 )
SSE_3d <- sum( (mytest$SalePrice - yh_3d)^2 ) 

rs_hd <- 1 - SSE_hd/SSTO
rs_3d <- 1 - SSE_3d/SSTO

n <- nrow(mytest)
p_3d <- length(house3d)
p_hd <- length(househd)

rsa_hd <- 1 - (n-1)/(n-p_hd)*SSE_hd/SSTO
rsa_3d <- 1 - (n-1)/(n-p_3d)*SSE_3d/SSTO

rsa_hd
## [1] 0.5286034
summary(househd)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + LotArea + GarageArea + Alley + 
##     FullBath + ScreenPorch, data = mytrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -375747  -19694   -1284   20342  260709 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.604e+04  7.950e+03  -8.307 3.20e-16 ***
## TotalSF      6.043e+01  2.438e+00  24.787  < 2e-16 ***
## LotArea      1.975e-01  1.786e-01   1.106  0.26908    
## GarageArea   1.027e+02  8.024e+00  12.799  < 2e-16 ***
## AlleyNone    2.843e+04  7.045e+03   4.036 5.87e-05 ***
## AlleyPave    2.522e+04  1.071e+04   2.354  0.01874 *  
## FullBath     9.066e+03  3.152e+03   2.876  0.00411 ** 
## ScreenPorch  5.255e+01  2.607e+01   2.016  0.04407 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42900 on 992 degrees of freedom
## Multiple R-squared:  0.7228, Adjusted R-squared:  0.7209 
## F-statistic: 369.5 on 7 and 992 DF,  p-value: < 2.2e-16
rsa_3d
## [1] 0.5761344
summary(house3d)
## 
## Call:
## lm(formula = SalePrice ~ TotalSF + LotArea + TotalSF:LotArea, 
##     data = mytrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -347259  -22001     506   21638  257755 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -4.900e+04  8.896e+03  -5.508 4.61e-08 ***
## TotalSF          8.750e+01  3.120e+00  28.040  < 2e-16 ***
## LotArea          2.339e+00  7.516e-01   3.113  0.00191 ** 
## TotalSF:LotArea -6.272e-04  2.235e-04  -2.806  0.00511 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46890 on 996 degrees of freedom
## Multiple R-squared:  0.6675, Adjusted R-squared:  0.6665 
## F-statistic: 666.6 on 3 and 996 DF,  p-value: < 2.2e-16